#!/usr/bin/env python3
"""
verify_record_doubleflip.py
One-command acceptance + provenance for SIM:V2-TickChain-DoubleFlip.

Usage (from repo root):
  python verify_record_doubleflip.py           # record using existing results
  python verify_record_doubleflip.py --run     # run sim, then record
  python verify_record_doubleflip.py --run --commit --tag v2-doubleflip-accept-2025-08-19
"""
import argparse, csv, hashlib, json, os, subprocess, sys, platform
from pathlib import Path
from datetime import date
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def sha256_file(path: Path) -> str:
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(1<<20), b""):
            h.update(chunk)
    return h.hexdigest()

def detect_commit_sha(repo_root: Path) -> str:
    try:
        sha = subprocess.check_output(
            ["git","-C",str(repo_root),"rev-parse","HEAD"], text=True
        ).strip()
        if len(sha) == 40:
            return sha
    except Exception:
        pass
    # Fallback to .git files
    head = repo_root/".git/HEAD"
    if head.exists():
        content = head.read_text(encoding="utf-8", errors="ignore").strip()
        if content.startswith("ref:"):
            ref = content.split()[-1]
            ref_path = repo_root/".git"/ref
            if ref_path.exists():
                sha = ref_path.read_text(encoding="utf-8", errors="ignore").strip()
                if len(sha) == 40:
                    return sha
        elif len(content) == 40:
            return content
    return "0"*40  # if not a git repo

def build_run_env() -> str:
    py = sys.version.split()[0]
    os_name = f"{platform.system()} {platform.release()}"
    parts = [f"{os_name}; Python {py}"]
    for lib in ("numpy","matplotlib","pandas"):
        try:
            mod = __import__(lib)
            ver = getattr(mod, "__version__", "unknown")
            parts.append(f"{lib} {ver}")
        except Exception:
            pass
    return "; ".join(parts)

def ks_2samp_stat(x, y):
    x = np.sort(x); y = np.sort(y)
    vals = np.concatenate([x, y])
    cdf_x = np.searchsorted(x, vals, side='right') / x.size
    cdf_y = np.searchsorted(y, vals, side='right') / y.size
    return float(np.max(np.abs(cdf_x - cdf_y)))

def ks_pvalue_approx(D, n, m):
    if D == 0:
        return 1.0
    ne = n*m/(n+m)
    lam = (np.sqrt(ne) + 0.12 + 0.11/np.sqrt(ne)) * D
    s = 0.0
    for j in range(1, 200):
        s += 2 * ((-1)**(j-1)) * np.exp(-2 * (j*j) * (lam*lam))
    return float(max(0.0, min(1.0, s)))

def main():
    p = argparse.ArgumentParser(description="Accept + record SIM:V2-TickChain-DoubleFlip in one step")
    p.add_argument("--sim-id", default="SIM:V2-TickChain-DoubleFlip-001")
    p.add_argument("--repo-url", default="")
    p.add_argument("--seed", type=int, default=20250818)
    p.add_argument("--trials", type=int, default=10000)
    p.add_argument("--N", type=int, default=5)
    p.add_argument("--run", action="store_true", help="Run tick_chain.py before recording")
    p.add_argument("--commit", action="store_true", help="git add/commit records")
    p.add_argument("--tag", help="optional git tag name")
    p.add_argument("--registry", default="AR_V1_sim_registry.csv")
    p.add_argument("--json-out", default=None)
    args = p.parse_args()

    root = Path(".").resolve()
    results = root / "results"
    results.mkdir(parents=True, exist_ok=True)

    # 1) Optional: run the sim
    sim_cmd = None
    if args.run:
        if (root/"src/tick_chain.py").exists():
            sim_cmd = f"python src/tick_chain.py --seed {args.seed} --trials {args.trials} --N {args.N}"
        elif (root/"tick_chain.py").exists():
            sim_cmd = f"python tick_chain.py --seed {args.seed} --trials {args.trials} --N {args.N}"
        else:
            print("Couldn't find src/tick_chain.py or tick_chain.py. Put this script next to your sim.")
            sys.exit(2)
        print("[run]", sim_cmd)
        try:
            subprocess.check_call(sim_cmd, shell=True)
        except subprocess.CalledProcessError as e:
            print("Run failed:", e); sys.exit(2)

    # 2) Load results
    csv_path = results / "tick_chain_results.csv"
    if not csv_path.exists():
        print("Missing results/tick_chain_results.csv. Run with --run or generate it first.")
        sys.exit(2)
    df = pd.read_csv(csv_path)
    for col in ("Out2B","In2B"):
        if col not in df.columns:
            print(f"Expected column '{col}' not in {csv_path}."); sys.exit(2)

    outward = df["Out2B"].to_numpy()
    inward  = df["In2B"].to_numpy()

    # 3) Save acceptance artifacts
    fs_out = results / "final_states_outward.csv"
    fs_in  = results / "final_states_inward.csv"
    pd.DataFrame({"final_state": outward}).to_csv(fs_out, index=False)
    pd.DataFrame({"final_state": inward}).to_csv(fs_in, index=False)

    D = ks_2samp_stat(outward, inward)
    p_ks = ks_pvalue_approx(D, len(outward), len(inward))
    p_out0 = float(np.mean(outward == 0))
    p_in0  = float(np.mean(inward == 0))
    delta  = abs(p_out0 - p_in0)

    bins = np.arange(min(outward.min(), inward.min()) - 0.5,
                     max(outward.max(), inward.max()) + 1.5, 1.0)
    plt.figure(figsize=(8,6))
    plt.hist(outward, bins=bins, alpha=0.5, label="B(F(F(r0)))")
    plt.hist(inward,  bins=bins, alpha=0.5, label="B(S(S(r0)))")
    plt.title("Final-State Distributions: Outward vs Inward (Projected)")
    plt.xlabel("final_state"); plt.ylabel("count"); plt.legend()
    plot_path = results / "final_states_overlay.png"
    plt.tight_layout(); plt.savefig(plot_path); plt.close()

    # 4) Summary + provenance
    commit_sha = detect_commit_sha(root)
    run_env = build_run_env()

    # Best-effort CLI that produced these results
    if sim_cmd:
        config_ref = sim_cmd
    elif (root/"src/tick_chain.py").exists():
        config_ref = f"python src/tick_chain.py --seed {args.seed} --trials {args.trials} --N {args.N}"
    elif (root/"tick_chain.py").exists():
        config_ref = f"python tick_chain.py --seed {args.seed} --trials {args.trials} --N {args.N}"
    else:
        config_ref = "NA"

    config_hash = hashlib.sha256(config_ref.encode("utf-8")).hexdigest() if config_ref != "NA" else "NA"
    status = "VERIFIED" if (p_ks >= 0.95 and delta <= 1e-3) else "HOLDING"

    summary = (
        f"Acceptance Summary — {args.sim_id}\n"
        f"Commit: {commit_sha}\n"
        f"Seed/CLI: {config_ref}\n"
        f"Trials (N): {len(outward)}\n"
        f"KS two-sample p-value: {p_ks:.6f}\n"
        f"Baseline: p_out(0)={p_out0:.6f}, p_in(0)={p_in0:.6f}, |Δ|={delta:.6e}\n"
        f"Pass: KS p>=0.95 — {'YES' if p_ks>=0.95 else 'NO'}\n"
        f"Pass: |Δ|<=1e-3 — {'YES' if delta<=1e-3 else 'NO'}\n"
    )
    (results / "final_states_summary.txt").write_text(summary, encoding="utf-8")

    record = {
        "sim_id": args.sim_id,
        "repo_url": args.repo_url,
        "commit_hash": commit_sha,
        "seed": str(args.seed),
        "config_ref": config_ref,
        "config_hash": config_hash,
        "run_env": run_env,
        "metrics": f"N={len(outward)}; KS_p={p_ks:.6f}; |p0_out-p0_in|={delta:.6e}",
        "result_summary": f"{'PASS' if status=='VERIFIED' else 'HOLDING'} — KS p={p_ks:.6f}; |p_out(0)-p_in(0)|={delta:.6e}",
        "date_ran": date.today().isoformat(),
        "status": status,
        "outputs_hashes": {
            "results/final_states_outward.csv": sha256_file(fs_out),
            "results/final_states_inward.csv": sha256_file(fs_in),
            "results/final_states_overlay.png": sha256_file(plot_path),
            "results/final_states_summary.txt": sha256_file(results / "final_states_summary.txt"),
        }
    }

    # JSON sidecar
    prov_dir = root / "provenance"; prov_dir.mkdir(exist_ok=True)
    json_out = args.json_out or f"provenance/{args.sim_id.replace(':','_')}_{date.today().isoformat()}.json"
    json_path = root / json_out
    json_path.write_text(json.dumps(record, indent=2), encoding="utf-8")

    # Registry CSV (append)
    reg = root / args.registry
    headers = ["sim_id","repo_url","commit_hash","seed","config_ref","config_hash",
               "run_env","metrics","result_summary","date_ran","status","outputs_hashes"]
    write_header = not reg.exists()
    with open(reg, "a", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=headers)
        if write_header: w.writeheader()
        row = record.copy()
        row["outputs_hashes"] = json.dumps(record["outputs_hashes"], separators=(",",":"))
        w.writerow(row)

    print(summary)
    print(f"Wrote: {json_path} and appended: {reg}")

    # 5) Optional: commit/tag the records
    if args.commit:
        try:
            to_add = ["results", args.registry, str(json_path)]
            for path in to_add:
                if Path(path).exists():
                    subprocess.check_call(["git","add", path], cwd=root)
            msg = f"{args.sim_id} — {status}; KS p={p_ks:.6f}; Δ={delta:.3e}"
            subprocess.check_call(["git","commit","-m", msg], cwd=root)
            if args.tag:
                subprocess.check_call(["git","tag","-a", args.tag, "-m", msg], cwd=root)
            print("[git] committed" + (f" and tagged {args.tag}" if args.tag else ""))
        except Exception as e:
            print("[git] commit/tag skipped:", e)

if __name__ == "__main__":
    main()
